import pandas as pd
import os
# 读取Excel文件
file_path = os.getcwd() + '\\gptdata.xlsx'
# df = pd.read_csv(file_path)
df = pd.read_excel(file_path)

# 清洗数据，保留包含关键词的行
keyword = 'gpt'
column_names = df.columns.tolist()
print(column_names)
df.dropna(subset=['weibotext'], inplace=True)  # 假设微博正文列名为'微博正文'

df['weibotext'] = df['weibotext'].str.lower()
dffiltered_df = df[df['weibotext'].str.contains(keyword, case=False)]

# 重新生成Excel文件
output_file_path = os.getcwd() + '\\清洗后的文件.xlsx'
dffiltered_df.to_excel(output_file_path, index=False)
# df.close()

print("清洗完成，生成的文件路径为:", output_file_path)